Install packages

if(!require("htmltools")) install.packages("htmltools") 
if(!require("remotes")) install.packages("remotes") 
if(!require("MultiEWCE")) remotes::install_github("neurogenomics/MutltiEWCE", dependencies = TRUE)

Prioritise targets

Filter & sort

res <- MultiEWCE::prioritise_targets()
## Prioritising gene targets.
## Importing existing file: ... phenotype_to_genes.txt
## Adding term definitions.
## Adding level-3 ancestor to each HPO ID.
## Prioritised targets: step='start' 
##  - rows: 435,204 
##  - phenotypes: 5,652 
##  - celltypes: 77
## Filtering @ q-value <= 0.05
## Prioritised targets: step='q_threshold' 
##  - rows: 7,648 
##  - phenotypes: 2,580 
##  - celltypes: 77
## Filtering @ fold-change >= 1
## Prioritised targets: step='fold_threshold' 
##  - rows: 7,648 
##  - phenotypes: 2,580 
##  - celltypes: 77
## Getting absolute ontology level for 2,580 HPO IDs.
## Prioritised targets: step='keep_ont_levels' 
##  - rows: 7,648 
##  - phenotypes: 2,580 
##  - celltypes: 77
## Annotating phenos with Onset.
## Importing existing file: ... phenotype.hpoa
## Importing existing file: ... phenotype.hpoa
## Prioritised targets: step='keep_onsets' 
##  - rows: 7,489 
##  - phenotypes: 2,520 
##  - celltypes: 77
## Annotating phenos with Tiers.
## Prioritised targets: step='keep_tiers' 
##  - rows: 722 
##  - phenotypes: 132 
##  - celltypes: 66
## Annotating phenos with Modifiers
## Prioritised targets: step='severity_threshold' 
##  - rows: 645 
##  - phenotypes: 123 
##  - celltypes: 66 
##  - diseases: 11
## Annotating phenotype frequencies.
## Prioritised targets: step='pheno_frequency_threshold' 
##  - rows: 564 
##  - phenotypes: 101 
##  - celltypes: 64 
##  - diseases: 11
## 33 / 64 of cell types kept.
## Prioritised targets: step='keep_celltypes' 
##  - rows: 353 
##  - phenotypes: 86 
##  - celltypes: 33 
##  - diseases: 11
## Filtering by gene size.
## Converting phenos to GRanges.
## Importing existing file: ... phenotype_to_genes.txt
## Loading required namespace: ensembldb
## Gathering gene metadata
## Loading required namespace: EnsDb.Hsapiens.v75
## Prioritised targets: step='keep_seqnames' 
##  - rows: 53,198 
##  - phenotypes: 86 
##  - genes: 3,601
## 217 / 3,601 genes kept.
## Prioritised targets: step='gene_size' 
##  - rows: 2,740 
##  - phenotypes: 84 
##  - genes: 217
## Prioritised targets: step='keep_biotypes' 
##  - rows: 2,740 
##  - phenotypes: 84 
##  - genes: 217
## Filtering by specificity_quantile.
## Filtering by mean_exp_quantile.
## Annotating gene frequencies.
## Importing existing file: ... genes_to_phenotype.txt
## Prioritised targets: step='gene_frequency_threshold' 
##  - rows: 11,290 
##  - phenotypes: 82 
##  - celltypes: 32 
##  - diseases: 10 
##  - genes: 217
## Prioritised targets: step='keep_specificity_quantiles' 
##  - rows: 307 
##  - phenotypes: 59 
##  - celltypes: 27 
##  - diseases: 10 
##  - genes: 62
## Prioritised targets: step='keep_mean_exp_quantiles' 
##  - rows: 307 
##  - phenotypes: 59 
##  - celltypes: 27 
##  - diseases: 10 
##  - genes: 62
## Sorting rows.
## Finding top 20 gene targets per: HPO_ID, CellType
## Prioritised targets: step='top_n' 
##  - rows: 307 
##  - phenotypes: 59 
##  - celltypes: 27 
##  - diseases: 10 
##  - genes: 62
## Prioritised targets: step='end' 
##  - rows: 307 
##  - phenotypes: 59 
##  - celltypes: 27 
##  - diseases: 10 
##  - genes: 62

Top targets

Here are the top gene targets based on the default filtering/sorting criterion of prioritise_targets.

MultiEWCE::create_dt(res$top_targets)
## Loading required namespace: DT

Filtering report

Here is a report that shows how many phenotypes/celltypes/genes remained after each step within the prioritise_targets pipeline.

MultiEWCE::create_dt(res$report)

Plot network

Generate a network from the top phenotype-celltype-gene associations.

vn_top <- MultiEWCE::prioritise_targets_network(top_targets = res$top_targets, 
                                                 save_path = here::here("reports",
                                                            "top_targets_network.html"),
                                                show_plot = FALSE)
## Loading required namespace: visNetwork
## Loading required namespace: igraph
## Creating network.
## Creating plot.
## Saving plot ==> /Users/schilder/Desktop/ewce/RareDiseasePrioritisation/reports/top_targets_network.html
# visNetwork::renderVisNetwork(vn_top$plot)
htmltools::includeHTML("https://github.com/neurogenomics/RareDiseasePrioritisation/raw/master/reports/top_targets_network.html")
visNetwork

Aggregate results

df_agg <- MultiEWCE::agg_results(phenos = res$top_targets,
                                 count_var = "CellType", 
                                 group_var = c("Phenotype","ancestor_name"))
## Aggregating results by group_var='Phenotype'Aggregating results by group_var='ancestor_name'
## Adding HPO IDs.
## Importing existing file: ... phenotype_to_genes.txt
MultiEWCE::create_dt(df_agg)

Aggregate results

Subset phenotypes to those included in intellectual disability, and are related to cognition.

df_intel <- res$top_targets[
    disease_characteristic=="Intellectual disability" &
    (!Phenotype %in% c("Choreoathetosis","Coma")),]

Top genes

top_genes <- sort(table(df_intel$Gene),
                  decreasing = TRUE)
print(top_genes)
## 
##       SOX3       SOX2      FOXH1     POU3F4       RTL1      FOXG1       GSX2 
##         12         10          7          7          6          4          4 
##     TUBB2A      GPR88       PIGY      PROP1    SLC18A3      ASCL1    MAB21L2 
##          4          3          3          3          3          2          2 
##       ROM1       SIX6 SNORD116-1   SNORD118       HES7      HOXA2    RIPPLY2 
##          2          2          2          2          1          1          1 
##        TRH 
##          1

Top cell types

top_celltypes <- sort(table(unique(df_intel[,c("Phenotype","HPO_ID","CellType")])$CellType),
         decreasing = TRUE)
print(top_celltypes)
## 
##  Excitatory neurons  Inhibitory neurons          Astrocytes            ENS glia 
##                   4                   4                   3                   3 
##      Ganglion cells     Granule neurons    Horizontal cells    Oligodendrocytes 
##                   3                   3                   3                   3 
##    Purkinje neurons    Visceral neurons      Amacrine cells       Bipolar cells 
##                   3                   2                   1                   1 
##         ENS neurons Photoreceptor cells 
##                   1                   1

Prioritise targets: extended

Now let’s lift some of the filters on phenotypes and cell types to recover a more extensive network.

Filter & sort

res_all <- MultiEWCE::prioritise_targets(keep_onsets = NULL,
                                         keep_tiers = NULL,
                                         pheno_frequency_threshold = c(10,NA),
                                         gene_frequency_threshold = c(10,NA),
                                         severity_threshold = NULL,
                                         keep_specificity_quantiles = seq(30,40),
                                         keep_mean_exp_quantiles = seq(30,40))
## Prioritising gene targets.
## Importing existing file: ... phenotype_to_genes.txt
## Adding term definitions.
## Adding level-3 ancestor to each HPO ID.
## Prioritised targets: step='start' 
##  - rows: 435,204 
##  - phenotypes: 5,652 
##  - celltypes: 77
## Filtering @ q-value <= 0.05
## Prioritised targets: step='q_threshold' 
##  - rows: 7,648 
##  - phenotypes: 2,580 
##  - celltypes: 77
## Filtering @ fold-change >= 1
## Prioritised targets: step='fold_threshold' 
##  - rows: 7,648 
##  - phenotypes: 2,580 
##  - celltypes: 77
## Getting absolute ontology level for 2,580 HPO IDs.
## Prioritised targets: step='keep_ont_levels' 
##  - rows: 7,648 
##  - phenotypes: 2,580 
##  - celltypes: 77
## Annotating phenos with Onset.
## Importing existing file: ... phenotype.hpoa
## Prioritised targets: step='keep_onsets' 
##  - rows: 7,648 
##  - phenotypes: 2,580 
##  - celltypes: 77
## Annotating phenos with Tiers.
## Prioritised targets: step='keep_tiers' 
##  - rows: 7,656 
##  - phenotypes: 2,580 
##  - celltypes: 77
## Annotating phenos with Modifiers
## Prioritised targets: step='severity_threshold' 
##  - rows: 7,656 
##  - phenotypes: 2,580 
##  - celltypes: 77 
##  - diseases: 225
## Annotating phenotype frequencies.
## Prioritised targets: step='pheno_frequency_threshold' 
##  - rows: 7,574 
##  - phenotypes: 2,543 
##  - celltypes: 76 
##  - diseases: 225
## 37 / 76 of cell types kept.
## Prioritised targets: step='keep_celltypes' 
##  - rows: 3,855 
##  - phenotypes: 1,711 
##  - celltypes: 37 
##  - diseases: 177
## Filtering by gene size.
## Converting phenos to GRanges.
## Importing existing file: ... phenotype_to_genes.txt
## Gathering gene metadata
## Prioritised targets: step='keep_seqnames' 
##  - rows: 665,375 
##  - phenotypes: 1,711 
##  - genes: 4,329
## 289 / 4,329 genes kept.
## Prioritised targets: step='gene_size' 
##  - rows: 38,512 
##  - phenotypes: 1,485 
##  - genes: 289
## Prioritised targets: step='keep_biotypes' 
##  - rows: 38,512 
##  - phenotypes: 1,485 
##  - genes: 289
## Filtering by specificity_quantile.
## Filtering by mean_exp_quantile.
## Annotating gene frequencies.
## Importing existing file: ... genes_to_phenotype.txt
## Prioritised targets: step='gene_frequency_threshold' 
##  - rows: 114,312 
##  - phenotypes: 1,454 
##  - celltypes: 37 
##  - diseases: 170 
##  - genes: 289
## Prioritised targets: step='keep_specificity_quantiles' 
##  - rows: 16,296 
##  - phenotypes: 1,207 
##  - celltypes: 37 
##  - diseases: 149 
##  - genes: 245
## Prioritised targets: step='keep_mean_exp_quantiles' 
##  - rows: 16,296 
##  - phenotypes: 1,207 
##  - celltypes: 37 
##  - diseases: 149 
##  - genes: 245
## Sorting rows.
## Finding top 20 gene targets per: HPO_ID, CellType
## Prioritised targets: step='top_n' 
##  - rows: 15,303 
##  - phenotypes: 1,207 
##  - celltypes: 37 
##  - diseases: 149 
##  - genes: 245
## Prioritised targets: step='end' 
##  - rows: 15,303 
##  - phenotypes: 1,207 
##  - celltypes: 37 
##  - diseases: 149 
##  - genes: 245

Plot network

Note that we set physics=FALSE to reduce the computation that it takes to render the plot.

vn_all <- MultiEWCE::prioritise_targets_network(top_targets = res_all$top_targets, 
                                                save_path = here::here("reports",
                                                            "all_targets_network.html"), 
                                                physics = FALSE,  
                                                show_plot = FALSE)
## Creating network.
## Creating plot.
## Saving plot ==> /Users/schilder/Desktop/ewce/RareDiseasePrioritisation/reports/all_targets_network.html
htmltools::includeHTML("https://github.com/neurogenomics/RareDiseasePrioritisation/raw/master/reports/all_targets_network.html")
visNetwork

Aggregate results

all_agg <- MultiEWCE::agg_results(phenos = res_all$top_targets,
                                  count_var = "CellType", 
                                  group_var = "Phenotype")
## Aggregating results by group_var='Phenotype'
## Adding HPO IDs.
## Importing existing file: ... phenotype_to_genes.txt
MultiEWCE::create_dt(all_agg) 

Top phenotypes

Get the phenotypes that were enriched in the greatest number of cell types.

#### All ontology level ####
head(sort(table(unique(res_all$top_targets[,c("Phenotype","CellType")])$Phenotype), 
          decreasing = TRUE))
## 
##        Abnormality of eye movement            Intellectual disability 
##                                 16                                 16 
##         Abnormal muscle physiology               Abnormal muscle tone 
##                                 15                                 15 
## Abnormal nervous system physiology     Abnormality of the musculature 
##                                 15                                 15
#### Only lower ontology levels #####
all_targets <- HPOExplorer::add_ont_lvl(res_all$top_targets)
head(sort(table(unique(res_all$top_targets[ontLvl<=4,c("Phenotype","CellType")])$Phenotype), 
          decreasing = TRUE))
## 
##            Intellectual disability                          Hypotonia 
##                                 16                                 15 
##     Neurodevelopmental abnormality Abnormal involuntary eye movements 
##                                 15                                 14 
##     Neurological speech impairment                          Nystagmus 
##                                 14                                 14

Top cell types

Get the cell types that were enriched in the greatest number of unique phenotypes.

head(sort(table(unique(res_all$top_targets[,c("Phenotype","CellType")])$CellType), 
          decreasing = TRUE))
## 
##       Excitatory neurons Antigen presenting cells           Cardiomyocytes 
##                      219                      186                      167 
##                 ENS glia    Limbic system neurons           Ganglion cells 
##                      162                      153                      148

Top genes

Get the genes that were enriched in the greatest number of unique phenotypes.

head(sort(table(unique(res_all$top_targets[,c("Phenotype","Gene")])$Gene), 
          decreasing = TRUE))
## 
##     RMRP RNU4ATAC  TMEM107    FOXG1    FOXH1     SOX2 
##      188      183      152      148      146      145

Top ancestors

Get the most common ancestors in the results.

ancestor_freq <- sort(table(res_all$top_targets$ancestor), decreasing = TRUE) |>
    data.table::data.table() |> 
    `colnames<-`(c("HPO_ID","freq"))
ancestor_freq$Phenotype <- HPOExplorer::harmonise_phenotypes(phenotypes = ancestor_freq$HPO_ID)

MultiEWCE::create_dt(ancestor_freq) 

Session info

utils::sessionInfo()
## R version 4.2.1 (2022-06-23)
## Platform: x86_64-apple-darwin17.0 (64-bit)
## Running under: macOS Big Sur ... 10.16
## 
## Matrix products: default
## BLAS:   /Library/Frameworks/R.framework/Versions/4.2/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.2/Resources/lib/libRlapack.dylib
## 
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
## [1] MultiEWCE_0.1.3 remotes_2.4.2   htmltools_0.5.4
## 
## loaded via a namespace (and not attached):
##   [1] utf8_1.2.3                    R.utils_2.12.2               
##   [3] tidyselect_1.2.0              RSQLite_2.2.20               
##   [5] AnnotationDbi_1.60.0          htmlwidgets_1.6.1            
##   [7] grid_4.2.1                    BiocParallel_1.32.5          
##   [9] munsell_0.5.0                 codetools_0.2-18             
##  [11] DT_0.27                       colorspace_2.1-0             
##  [13] Biobase_2.58.0                filelock_1.0.2               
##  [15] knitr_1.42                    rstudioapi_0.14              
##  [17] orthogene_1.4.1               stats4_4.2.1                 
##  [19] SingleCellExperiment_1.20.0   ggsignif_0.6.4               
##  [21] MatrixGenerics_1.10.0         GenomeInfoDbData_1.2.9       
##  [23] bit64_4.0.5                   rprojroot_2.0.3              
##  [25] coda_0.19-4                   vctrs_0.5.2                  
##  [27] treeio_1.22.0                 generics_0.1.3               
##  [29] xfun_0.37                     BiocFileCache_2.6.0          
##  [31] R6_2.5.1                      GenomeInfoDb_1.34.8          
##  [33] pals_1.7                      AnnotationFilter_1.22.0      
##  [35] bitops_1.0-7                  cachem_1.0.6                 
##  [37] gridGraphics_0.5-1            DelayedArray_0.24.0          
##  [39] assertthat_0.2.1              promises_1.2.0.1             
##  [41] BiocIO_1.8.0                  scales_1.2.1                 
##  [43] gtable_0.3.1                  ontologyPlot_1.6             
##  [45] ensembldb_2.22.0              rlang_1.0.6                  
##  [47] rtracklayer_1.58.0            rstatix_0.7.2                
##  [49] lazyeval_0.2.2                dichromat_2.0-0.1            
##  [51] broom_1.0.3                   BiocManager_1.30.19          
##  [53] yaml_2.3.7                    reshape2_1.4.4               
##  [55] HPOExplorer_0.99.3            abind_1.4-5                  
##  [57] GenomicFeatures_1.50.4        ggnetwork_0.5.10             
##  [59] crosstalk_1.2.0               backports_1.4.1              
##  [61] httpuv_1.6.8                  tools_4.2.1                  
##  [63] ggplotify_0.1.0               statnet.common_4.8.0         
##  [65] ggplot2_3.4.0                 ellipsis_0.3.2               
##  [67] jquerylib_0.1.4               paintmap_1.0                 
##  [69] BiocGenerics_0.44.0           Rcpp_1.0.10                  
##  [71] plyr_1.8.8                    visNetwork_2.1.2             
##  [73] progress_1.2.2                zlibbioc_1.44.0              
##  [75] purrr_1.0.1                   RCurl_1.98-1.10              
##  [77] prettyunits_1.1.1             ggpubr_0.5.0                 
##  [79] S4Vectors_0.36.1              SummarizedExperiment_1.28.0  
##  [81] grr_0.9.5                     here_1.0.1                   
##  [83] magrittr_2.0.3                data.table_1.14.6            
##  [85] ProtGenerics_1.30.0           matrixStats_0.63.0           
##  [87] hms_1.1.2                     patchwork_1.1.2              
##  [89] mime_0.12                     evaluate_0.20                
##  [91] xtable_1.8-4                  XML_3.99-0.13                
##  [93] EWCE_1.6.0                    IRanges_2.32.0               
##  [95] compiler_4.2.1                biomaRt_2.54.0               
##  [97] tibble_3.1.8                  maps_3.4.1                   
##  [99] crayon_1.5.2                  R.oo_1.25.0                  
## [101] ggfun_0.0.9                   later_1.3.0                  
## [103] tidyr_1.3.0                   aplot_0.1.9                  
## [105] DBI_1.1.3                     ExperimentHub_2.6.0          
## [107] gprofiler2_0.2.1              dbplyr_2.3.0                 
## [109] rappdirs_0.3.3                babelgene_22.9               
## [111] EnsDb.Hsapiens.v75_2.99.0     Matrix_1.5-3                 
## [113] car_3.1-1                     piggyback_0.1.4              
## [115] cli_3.6.0                     R.methodsS3_1.8.2            
## [117] parallel_4.2.1                igraph_1.3.5                 
## [119] GenomicRanges_1.50.2          pkgconfig_2.0.3              
## [121] GenomicAlignments_1.34.0      plotly_4.10.1                
## [123] xml2_1.3.3                    ggtree_3.6.2                 
## [125] bslib_0.4.2                   XVector_0.38.0               
## [127] yulab.utils_0.0.6             stringr_1.5.0                
## [129] digest_0.6.31                 graph_1.76.0                 
## [131] Biostrings_2.66.0             rmarkdown_2.20               
## [133] HGNChelper_0.8.1              tidytree_0.4.2               
## [135] restfulr_0.0.15               curl_5.0.0                   
## [137] shiny_1.7.4                   Rsamtools_2.14.0             
## [139] rjson_0.2.21                  lifecycle_1.0.3              
## [141] nlme_3.1-161                  jsonlite_1.8.4               
## [143] carData_3.0-5                 network_1.18.1               
## [145] mapproj_1.2.11                viridisLite_0.4.1            
## [147] limma_3.54.1                  fansi_1.0.4                  
## [149] pillar_1.8.1                  ontologyIndex_2.10           
## [151] lattice_0.20-45               homologene_1.4.68.19.3.27    
## [153] KEGGREST_1.38.0               fastmap_1.1.0                
## [155] httr_1.4.4                    interactiveDisplayBase_1.36.0
## [157] glue_1.6.2                    RNOmni_1.0.1                 
## [159] png_0.1-8                     ewceData_1.6.0               
## [161] BiocVersion_3.16.0            bit_4.0.5                    
## [163] Rgraphviz_2.42.0              stringi_1.7.12               
## [165] sass_0.4.5                    blob_1.2.3                   
## [167] AnnotationHub_3.6.0           memoise_2.0.1                
## [169] dplyr_1.1.0                   ape_5.6-2